{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow.keras.layers import LSTM\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# LSTM层的参数\n",
    "\n",
    "LSTM的第1个参数memory units是指hidden state的维度，也是输出向量的维度。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## stateful\n",
    "\n",
    "[?]  \n",
    "默认情况下，LSTM的memory每个mini-batch会reset一次。  \n",
    "设置参数`stateful=True`以后就不会自动reset。由开发者手动reset。    \n",
    "以下情况会需要用到这一功能：  \n",
    "1. batch_size = 1  \n",
    "2. 一个长序列分成了几个子序列  \n",
    "3. 一个非常长的序列所以要考虑效率问题  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## go_backwords\n",
    "\n",
    "定义LSTM的方向，默认为False（正向），设置为True则是反向。  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# LSTM作为输入层\n",
    "\n",
    "LSTM层要求的输入数据是三维的，分别代表样本数、时间步数、特征数。   \n",
    "对应的，LSTM层也要通过参数`input_shape`定义可以接受的输入数据的形状。input_shape的参数代表输入数据的时间步数和特征数。  \n",
    "默认的activation是linear。LSTM的第1个参数是输出向量的维度。   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.keras.layers.recurrent_v2.LSTM at 0x63f97b210>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "\n",
    "data = np.array([1,2,3,4,5,6,7,8,9,10])\n",
    "data = data.reshape(1, 10, 1)\n",
    "tf.keras.layers.LSTM(5, input_shape=(10, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.keras.layers.recurrent_v2.LSTM at 0x64074c710>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "data = np.array([[1,2],[3,4],[5,6],[7,8],[9,10]])\n",
    "data = data.reshape(1, 5, 2)\n",
    "tf.keras.layers.LSTM(5, input_shape=(5, 1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# LSTM作为隐藏层"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "vocab_size = 10000\n",
    "model = tf.keras.Sequential([\n",
    "    tf.keras.layers.Embedding(vocab_size, 64),\n",
    "    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),\n",
    "    tf.keras.layers.Dense(64, activation='relu'),\n",
    "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
    "])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 两个连续的LSTM层"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "vocab_size = 10000\n",
    "model = tf.keras.Sequential([\n",
    "    tf.keras.layers.Embedding(vocab_size, 64),\n",
    "    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),\n",
    "    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),\n",
    "    tf.keras.layers.Dense(64, activation='relu'),\n",
    "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
    "])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 双向LSTM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "vocab_size = 10000\n",
    "model = tf.keras.Sequential([\n",
    "    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),\n",
    "])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Embedding层\n",
    "\n",
    "LSTM之前通常要加入Embedding  \n",
    "Embedding是指把编码转成指定维度的向量，使具有相近语义的单词其向量也是相近的  \n",
    "Embedding通常作为输入层，且不需要定义input_shape，输入数据是二维的，第一维是样本数，第二维是样本序列。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential_2\"\n",
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "embedding (Embedding)        (None, 10, 30)            3000      \n",
      "_________________________________________________________________\n",
      "lstm (LSTM)                  (None, 10, 128)           81408     \n",
      "_________________________________________________________________\n",
      "lstm_1 (LSTM)                (None, 128)               131584    \n",
      "_________________________________________________________________\n",
      "dense (Dense)                (None, 5)                 645       \n",
      "=================================================================\n",
      "Total params: 216,637\n",
      "Trainable params: 216,637\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "model = tf.keras.Sequential()\n",
    "vocab_size = 100 # 单词表中有99个单词，There is one additional row to handle \"unknown\" words.\n",
    "embedding_dim = 30 # 输出向量为30维\n",
    "max_length = 10 # 每个样本的长度\n",
    "model.add(tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length))\n",
    "model.add(tf.keras.layers.LSTM(units=128, return_sequences=True,dropout=0.5))\n",
    "model.add(tf.keras.layers.LSTM(units=128, return_sequences=False,dropout=0.5))\n",
    "model.add(tf.keras.layers.Dense(units=5, activation='softmax'))\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 用预训练参数初始化Embedding层"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: 'pre_trained/glove.6B.50d.txt'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-5-b0ac3fab847b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     66\u001b[0m \u001b[0membedding_dim\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m30\u001b[0m \u001b[1;31m# 输出向量为30维\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     67\u001b[0m \u001b[0mmax_length\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m10\u001b[0m \u001b[1;31m# 每个样本的长度\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 68\u001b[1;33m \u001b[0mword_to_index\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex_to_word\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mword_to_vec_map\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mread_glove_vecs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'pre_trained/glove.6B.50d.txt'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     69\u001b[0m \u001b[0membedding_layer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpretrained_embedding_layer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mword_to_vec_map\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mword_to_index\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     70\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0membedding_layer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-5-b0ac3fab847b>\u001b[0m in \u001b[0;36mread_glove_vecs\u001b[1;34m(glove_file)\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mread_glove_vecs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mglove_file\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mglove_file\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'r'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mencoding\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'utf-8'\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m         \u001b[0mwords\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m         \u001b[0mword_to_vec_map\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'pre_trained/glove.6B.50d.txt'"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "def read_glove_vecs(glove_file):\n",
    "    with open(glove_file, 'r',encoding='utf-8') as f:\n",
    "        words = set()\n",
    "        word_to_vec_map = {}\n",
    "        for line in f:\n",
    "            line = line.strip().split()\n",
    "            curr_word = line[0]\n",
    "            words.add(curr_word)\n",
    "            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)\n",
    "\n",
    "        i = 1\n",
    "        words_to_index = {}\n",
    "        index_to_words = {}\n",
    "        for w in sorted(words):\n",
    "            words_to_index[w] = i\n",
    "            index_to_words[i] = w\n",
    "            i = i + 1\n",
    "    return words_to_index, index_to_words, word_to_vec_map\n",
    "\n",
    "def pretrained_embedding_layer(word_to_vec_map, word_to_index):\n",
    "    \"\"\"\n",
    "    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.\n",
    "\n",
    "    Arguments:\n",
    "    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.\n",
    "    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)\n",
    "\n",
    "    Returns:\n",
    "    embedding_layer -- pretrained layer Keras instance\n",
    "    \"\"\"\n",
    "\n",
    "    vocab_len = len(word_to_index) + 1  # adding 1 to fit Keras embedding (requirement)\n",
    "    emb_dim = word_to_vec_map[\"cucumber\"].shape[0]  # define dimensionality of your GloVe word vectors (= 50)\n",
    "\n",
    "    ### START CODE HERE ###\n",
    "    # Step 1\n",
    "    # Initialize the embedding matrix as a numpy array of zeros.\n",
    "    # See instructions above to choose the correct shape.\n",
    "    emb_matrix = np.zeros((vocab_len, emb_dim))\n",
    "\n",
    "    # Step 2\n",
    "    # Set each row \"idx\" of the embedding matrix to be\n",
    "    # the word vector representation of the idx'th word of the vocabulary\n",
    "    for word, idx in word_to_index.items():\n",
    "        emb_matrix[idx, :] = word_to_vec_map[word]\n",
    "\n",
    "    # Step 3\n",
    "    # Define Keras embedding layer with the correct input and output sizes\n",
    "    # Make it non-trainable.\n",
    "    embedding_layer = tf.keras.layers.Embedding(vocab_len, emb_dim, trainable=False)\n",
    "    ### END CODE HERE ###\n",
    "\n",
    "    # Step 4 (already done for you; please do not modify)\n",
    "    # Build the embedding layer, it is required before setting the weights of the embedding layer.\n",
    "    embedding_layer.build((None,))  # Do not modify the \"None\".  This line of code is complete as-is.\n",
    "\n",
    "    # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.\n",
    "    embedding_layer.set_weights([emb_matrix])\n",
    "\n",
    "    return embedding_layer\n",
    "\n",
    "model = tf.keras.Sequential()\n",
    "vocab_size = 100 # 单词表中有99个单词，There is one additional row to handle \"unknown\" words.\n",
    "embedding_dim = 30 # 输出向量为30维\n",
    "max_length = 10 # 每个样本的长度\n",
    "word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('pre_trained/glove.6B.50d.txt')\n",
    "embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)\n",
    "model.add(embedding_layer)\n",
    "model.add(tf.keras.layers.LSTM(units=128, return_sequences=True,dropout=0.5))\n",
    "model.add(tf.keras.layers.LSTM(units=128, return_sequences=False,dropout=0.5))\n",
    "model.add(tf.keras.layers.Dense(units=5, activation='softmax'))\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
